Install & Load Packages

library(readr)
library(foreign)
library(plyr)
library(tidyverse)
library(psych)
library(labelled)
library(ggplot2)
library(plotly)

Load Data

mtwins = read.spss(file = 'mtwins_ecv_faces_amy.sav', header = F, to.data.frame=TRUE, use.value.labels = F)
nis = read.spss(file = 'Suarez NIS data 05172021.sav', header = T, to.data.frame=TRUE, use.value.labels = F)

Demographics

Read Value Labels

as.data.frame(attr(mtwins$twin_sex, 'value.labels'))
as.data.frame(attr(mtwins$pc_yrace, 'value.labels'))
as.data.frame(attr(mtwins$pc_education, 'value.labels'))
as.data.frame(attr(mtwins$pc_annincome, 'value.labels'))

Define Missing Values

mtwins[mtwins == 77] = NA
mtwins[mtwins == 88] = NA
mtwins[mtwins == -99] = NA

Descriptives

table(mtwins$twin_sex)
## 
##   0   1 
## 386 322
demos = select(mtwins, twin_age_yr, pc_yrace, pc_education, pc_annincome)
describe(demos)
apply((demos), 2, table)
## $twin_age_yr
## 
##   7   8   9  10  11  12  13  14  15  16  17  18  19 
##   6   4  12  38  36  58  62 146 150  96  80  18   2 
## 
## $pc_yrace
## 
##   0   1   2   3   4   5   6 
## 538  10  84   4   4  58  10 
## 
## $pc_education
## 
##   1   3   4   5   6   7   8 
##   2   8  52 156 122 232 134 
## 
## $pc_annincome
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12 
##   4   6  18  16  18  14  52  66  38  70  46  82 264

Twin Age: Histogram

mtwins %>%
  ggplot(aes(x = twin_age_yr)) + 
  geom_histogram(bins = 12, fill = 'white', col = 'black') + 
  labs(title = "Histogram of Twin Age",
       x = "Twin Age (in years)", y = "Count") + 
  theme_classic() + theme(axis.title.x = element_text(size = 12, family = "Times"), 
                          axis.title.y = element_text(size = 12, family = "Times"),
                          title = element_text(size = 12, face = 'bold', family = 'Times'))

Twin Race: Pie Chart

race = data.frame(
  group = c("White","Hispanic","Black","Native American/Native Alaskan","Asian/Pacific Islander","Biracial","Other"),
  value = c(538, 10, 84, 4, 4, 58, 10)
)

# Create plotly interactive pie chart
fig = plot_ly(race, 
               labels = ~group, 
               values = ~value, 
               type = 'pie', 
               textfont=list(size=18))

fig = fig %>% layout(title = list(text='Twin Race/Ethnicity', font=list(size=24)),
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE, font=list(size=18)),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE, font=list(size=18)),
         showlegend=TRUE, legend=list(font=list(size=20)))

m = list(l = 50, r = 50, b = 100, t = 100, pad = 4)

fig = fig %>% layout(autosize = F, width = 750, height = 500, margin=m)
fig

Neighborhood Disadvantage

Neighborhood disadvantage is defined using geocoding of family addresses to assess the proportion of neighborhood residents living below the poverty line (10.5% was the average at the time of data collection) between 2011 and 2015 in each family’s census tract.

describe(mtwins$pov1115)
# How many families are living in neighborhoods where more than 10.5% of families in the neighborhood are living below the poverty line?
mtwins$belowPovLine <- ifelse(mtwins$pov1115 > .105, '>10.5% living below pov line', '<10.5% living below pov line')
table(mtwins$belowPovLine)
## 
## <10.5% living below pov line >10.5% living below pov line 
##                          254                          454
prop.table(table(mtwins$belowPovLine))
## 
## <10.5% living below pov line >10.5% living below pov line 
##                    0.3587571                    0.6412429

Michigan Census tract data

Load Data

michigan = read_csv("/Users/gabrielasuarez/Dropbox (University of Michigan)/MiND_Lab/Research_Projects/nhood_social_processes/Analysis/ACS_allCTs_2008-2012_families.csv")

Prep Data

# Rename the pov_2015 variable to pov1115
michigan$pov1115 = michigan$DP03_0119PE
michigan$pov1115 = michigan$pov1115/100
# Create two new dataframes only with the data that I need
michigan_dat = dplyr::select(michigan, pov1115)
mtwins_dat = dplyr::select(mtwins, pov1115)

# Make a new column in each dataframe that will be a variable to identify where they came from later
mtwins_dat$Group = 'MTwiNS Sample'
michigan_dat$Group = 'State of Michigan'

# Combine into new dataframe "nhood_pov"
nhoodPov = rbind(mtwins_dat, michigan_dat)

# Calculate the mean in each group
mu = ddply(nhoodPov, "Group", summarise, grp.mean=mean(pov1115, na.rm=T))

Overlaid Density Plots

ggplot(nhoodPov, aes(pov1115, fill = Group)) + 
  geom_density(alpha = 0.4, position = 'identity') +
  geom_vline(data=mu, aes(xintercept=grp.mean, color = Group), linetype="dashed", size= 1) +
  scale_color_brewer(palette="Set1") +
  scale_fill_brewer(palette="Set1") +
  labs(title = "American Community Survey (ACS) 5-Year Average",
       x = "Neighborhood Poverty (2011-2015)", y = "Density")+
  theme_light() + theme(axis.title.x = element_text(size = 12, family = 'Times'), 
                        axis.title.y = element_text(size = 12, family = 'Times'),
                        title = element_text(size = 12, face = 'bold', family = 'Times'))

Overlaid Histograms

palette1 = c("#009966","#56B4E9")

plot = ggplot(nhoodPov, aes(x = pov1115, fill = Group, color = Group)) +
  geom_histogram(binwidth = 0.05, alpha = 0.3, position = 'identity') +
  geom_vline(data=mu, aes(xintercept=grp.mean, color = Group), linetype="dashed", size=1) +
  theme(legend.position="bottom") +
  scale_color_manual(values=palette1) +
  scale_fill_manual(values=palette1) +
  scale_x_continuous(name="Percentage of families whose income is below the poverty level", breaks = seq(0, 1, 0.1)) +
  scale_y_continuous(name="Count", breaks = seq(0, 800, 100)) +
  coord_cartesian(xlim=c(0, 1), ylim=c(0, 800)) +
  #labs(title = "American Community Survey (ACS) 5-Year Average")+
  theme_light() + theme(axis.title.x = element_text(size = 12, face = "bold", family="Times"), 
                      axis.title.y = element_text(size = 12, face = "bold", family="Times"), 
                      legend.position = "top", 
                      legend.title = element_text(size = 12, face = "plain", family="Times"),
                      axis.text.x = element_text(size = 12),
                      axis.text.y = element_text(size = 12),
                      legend.text = element_text(size = 12, face = "bold", family="Times"))
plot

#ggsave("neighborhood_dist.png", plot=plot, width = 30, height = 20, unit = "cm", dpi=600)

Overlaid Histogram & Density Plot

ggplot(nhoodPov, aes(x = pov1115, fill = Group, color = Group))+
  geom_histogram(binwidth = 0.05, alpha = 0.6, aes(y = ..density..), position = 'identity')+
  geom_vline(data=mu, aes(xintercept=grp.mean, color = Group), linetype="dashed")+
  geom_density(alpha=0.1)+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  labs(title = "American Community Survey (ACS) 5-Year Average", 
       x = "Neighborhood Poverty Rates (2011-2015)", y = "Density/Count")+
  theme_light() + theme(axis.title.x = element_text(size = 12, family = 'Times'), 
                        axis.title.y = element_text(size = 12, family = 'Times'),
                        title = element_text(size = 12, face = 'bold', family = 'Times'))